Correspondence analysis and reciprocal scaling applied to an interaction network

Author

Lisa Nicvert

Published

October 7, 2024

Code
library(here) # path management

# Data wrangling
library(dplyr)
library(tidyr)
library(tibble)
library(stringr)
library(tidytext) # reorder_within
library(effectsize) # Cramer's V

# Ade4
library(ade4)
library(adegraphics)

# Plot
library(ggplot2)
library(patchwork)
library(viridisLite) # Color palette
library(DT) # Display table

# Graph stuff
library(igraph)
library(tidygraph)
library(ggraph)

# Custom package
library(RSnetwork)

set.seed(42)

# Paths ---
figures_path <- here("figures/04_data_analysis")
output_path <- here("outputs/04_data_analysis")

Parameters

Code
paste(names(params), params, sep = ": ")
[1] "in_folder: ANDEAN_Peru1" "min_freq: 2"            
[3] "presabs: FALSE"          "log: FALSE"             
[5] "colco: darkred"          "colli: cornflowerblue"  
[7] "nf_ca: 3"                "alpha: 0.05"            
[9] "selection: LRT"         

Prepare data

Read data

We first read data that was cleaned before (from Dehling et al. (2021)).

Code
#|code-fold: show

in_folder_type <- file.path(here("outputs/01_clean_data"))

in_folder_full <- file.path(in_folder_type,
                            params$in_folder)

interactions_df <- read.csv(file.path(in_folder_full,
                                    "interactions.csv"))
plant_traits <- read.csv(file.path(in_folder_full,
                                   "plant_traits.csv"))
animal_traits <- read.csv(file.path(in_folder_full,
                                    "animal_traits.csv"))

Format data

We remove some redundant traits (BillLength and meanL).

Code
# Remove "useless" traits
animal_traits <- animal_traits |>
  select(-BillLength)

plant_traits <- plant_traits |>
  select(-meanL)

Then, we remove the rows where there are NA in the traits.

Code
animal_traits <- animal_traits |>  
  drop_na()

plant_traits <- plant_traits |> 
  drop_na()

# Keep the table with traits for export
animal_traits_codes <- animal_traits
plant_traits_codes <- plant_traits

We format the interaction matrix and remove the column with species names.

Code
#|code-fold: show

interactions_full <- df_to_matrix(interactions_df)

plant_traits <- plant_traits |> 
  column_to_rownames("plant_species_code") |> 
  select(-plant_species)

animal_traits <- animal_traits |> 
  column_to_rownames("animal_species_code") |> 
  select(-animal_species)

The initial data matrix has 52 rows (plants) and 61 columns (birds).

We remove plant and/or bird species that do not appear in the traits table.

Code
# ---
# Animals
# ---
final_animal_codes <- colnames(interactions_full) %in% rownames(animal_traits)

if (!all(final_animal_codes)) { # If not all final_animal_codes are true
  filtered_out_codes <- colnames(interactions_full)[!final_animal_codes]
  filtered_out_names <- unique(interactions_df$animal_species[interactions_df$animal_species_code %in% filtered_out_codes])
  
  msg <- paste0(paste(filtered_out_names, filtered_out_codes, sep = " ("), ")")
} else {
  msg <- ""
}

interactions <- interactions_full[ , final_animal_codes]

The following animal(s) will be filtered out: .

Code
# ---
# Plants
# ---
final_plant_codes <- rownames(interactions) %in% rownames(plant_traits)

if (!all(final_plant_codes)) { # If not all final_animal_codes are true
  filtered_out_codes <- rownames(interactions)[!final_plant_codes]
  filtered_out_names <- unique(interactions_df$plant_species[interactions_df$plant_species_code %in% filtered_out_codes])
  
  msg <- paste0(paste(filtered_out_names, filtered_out_codes, sep = " ("), ")")
} else {
  msg <- ""
}

interactions <- interactions[final_plant_codes, ]

The following plant(s) will be filtered out: .

Filter data

Here we will filter the matrix so that each species interacts with 2 or more other species.

Code
interactions <- filter_matrix(mat = interactions, 
                              thr = params$min_freq)
Code
animal_traits <- animal_traits[colnames(interactions),]
plant_traits <- plant_traits[rownames(interactions),]

params$presabs is FALSE so we don’t transform the data to presence-absence.

Code
interactions_orig <- interactions
Code
if (params$presabs) {
  interactions <- (interactions != 0)
  interactions <- ifelse(interactions, 1, 0)
  interactions <- as.data.frame(interactions)
}

params$log is FALSE so we don’t scale the data to log scale.

Code
if (params$log) {
  interactions <- log(interactions + 1)
}

Save data in a file:

Code
write.csv(interactions, file = file.path(output_path, "interactions.csv"))
write.csv(animal_traits, file = file.path(output_path, "animal_traits.csv"))
write.csv(plant_traits, file = file.path(output_path, "plant_traits.csv"))

Characterize filtered out species

Let’s explore the profile of the birds/plants that were filtered out compared to the profiles of all birds/plants. Here, we look at the column sums (i.e. the marginal abundance). The aim is to see if the removed species were not only interacting with few species, but also rare.

Code
bird_names_out <- colnames(interactions_full)[!(colnames(interactions_full) %in% colnames(interactions))]
bird_deg_out <- colSums(interactions_full[, bird_names_out])

bird_deg_out <- data.frame(degree = bird_deg_out) |> 
  rownames_to_column("bird")

gout <- ggplot(bird_deg_out) +
  geom_col(aes(x = reorder(bird, -degree), y = degree),
           fill = params$colco) +
  theme_linedraw() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  ggtitle("Filtered out birds abundance")

bird_deg_full <- colSums(interactions_full)

bird_deg_full <- data.frame(degree = bird_deg_full) |> 
  rownames_to_column("bird")

gall <- ggplot(bird_deg_full) +
  geom_col(aes(x = reorder(bird, -degree), y = degree),
           fill = params$colco) +
  theme_linedraw() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  ggtitle("Birds abundance")

gall + gout & 
  scale_y_log10(limits = c(1, max(bird_deg_full$degree))) &
  ylab("Weighted degree") &
  theme(axis.title.x = element_blank())

Code
plant_names_out <- rownames(interactions_full)[!(rownames(interactions_full) %in% rownames(interactions))]
plant_deg_out <- rowSums(interactions_full[plant_names_out, ])

plant_deg_out <- data.frame(degree = plant_deg_out) |> 
  rownames_to_column("plant")

gout <- ggplot(plant_deg_out) +
  geom_col(aes(x = reorder(plant, -degree), y = degree),
           fill = params$colli) +
  theme_linedraw() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  ggtitle("Filtered out plants abundance")

plant_deg_full <- rowSums(interactions_full)

plant_deg_full <- data.frame(degree = plant_deg_full) |> 
  rownames_to_column("plant")

gall <- ggplot(plant_deg_full) +
  geom_col(aes(x = reorder(plant, -degree), y = degree),
           fill = params$colli) +
  theme_linedraw() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  ggtitle("Plant abundance")

gall + gout  & scale_y_log10(limits = c(1, max(plant_deg_full$degree))) &
  ylab("Weighted degree") &
  theme(axis.title.x = element_blank())

Display data

The matrix we will analyze has 40 rows (plants) and 53 columns (birds).

Here is the original data matrix:

Code
interactions_df <- matrix_to_df(interactions, colnames = "birds")

interactions_df_points <- interactions_df |> 
    filter(value != 0)
Code
if(params$log) {
  msize <- max(interactions)*0.7
} else {
  msize <- max(interactions)*0.01
}

breaks <- c(1, 10, 100, 500)

# Visualize filtered matrix
plot_matrix(interactions, 
            legend_title = ifelse(params$log, 
                                  "ln(frequency)", "Frequency"), 
            max_size = msize,
            base_size = 9,
            breaks = breaks,
            trans = "log1p",
            xlab = "Birds") +
  theme(legend.key.width = unit(0.1, 'cm'),
        axis.line.x.top = element_line(colour = params$colco, 
                                       linewidth = 1),
        axis.ticks.x.top = element_line(colour = params$colco),
        axis.line.y = element_line(colour = params$colli, 
                                   linewidth = 1),
        axis.ticks.y = element_line(colour = params$colli))

Code
# Export plot
ggsave(file.path(figures_path, "matrix.jpeg"),
       width = 14, height = 12, 
       dpi = 600, units = "cm", bg = "white")
Code
# Create graph object
g <- graph_from_biadjacency_matrix(interactions, weighted = TRUE)

gtbl <- as_tbl_graph(g) |> 
  activate(nodes) |> 
  mutate(type = ifelse(type, "bird", "plant"))

nodes <- V(g)

# Reorder nodes as the original table
co_reordered <- unname(rank(nodes[colnames(interactions)]))
li_reordered <- unname(rank(nodes[rownames(interactions)]))

# Create centered coordinates
co_y <- scale(co_reordered, center = TRUE,
              scale = FALSE)
li_y <- scale(li_reordered, center = TRUE,
              scale = FALSE)

y <- cbind(c(li_y, co_y))
x <- rep(c(0, 1), c(length(li_y), length(co_y)))
lay <- data.frame(x, y)
Code
gtbl <- gtbl |> 
  mutate(degree = centrality_degree(weights = weight))

ggraph(gtbl, layout = lay) +
  geom_edge_fan(aes(width = weight),
                alpha = 0.6,
                show.legend = FALSE) +
  scale_edge_width(range = c(.2, 2.5)) +
  scale_color_manual(values = c(params$colco, params$colli)) +
  geom_node_point(aes(col = type, size = degree),
                  show.legend = FALSE) +
  scale_size_area(max_size = 3, trans = "log") +
  geom_node_text(aes(label = name), 
                 show.legend = FALSE,
                 size = 2.5,
                 nudge_x = c(rep(-0.03, nrow(interactions)),
                             rep(0.03, ncol(interactions)))) +
  theme_void()

Names and codes

Below are the correspondences between species codes and their names:

Code
# Transform for LateX

# Italicize
codes_plants$`Species name` <- paste0("\\textit{", codes_plants$`Species name`, "}")
codes_animals$`Species name` <- paste0("\\textit{", codes_animals$`Species name`, "}")

# Add newline character
codes_plants[, ncol(codes_plants)] <- paste0(codes_plants[, ncol(codes_plants)], "\\\\")
colnames(codes_plants)[ncol(codes_plants)] <- paste0(colnames(codes_plants)[ncol(codes_plants)], "\\\\")

codes_animals[, ncol(codes_animals)] <- paste0(codes_animals[, ncol(codes_animals)], "\\\\")
colnames(codes_animals)[ncol(codes_animals)] <- paste0(colnames(codes_animals)[ncol(codes_animals)], "\\\\")
Code
write.table(codes_plants, 
            file = file.path(output_path, "codes_plants.csv"),
            quote = FALSE, 
            sep = " & ",
            row.names = FALSE)
write.table(codes_animals, 
            file = file.path(output_path, "codes_animals.csv"),
            quote = FALSE, 
            sep = " & ",
            row.names = FALSE)

Correspondence analysis

Let’s first perform a chi-squared test to check if there is structure in the data.

Code
(chsq <- chisq.test(interactions, simulate.p.value = TRUE))

    Pearson's Chi-squared test with simulated p-value (based on 2000
    replicates)

data:  interactions
X-squared = 17784, df = NA, p-value = 0.0004998
Code
(V <- cramers_v(interactions))
Cramer's V (adj.) |       95% CI
--------------------------------
0.29              | [0.26, 1.00]

- One-sided CIs: upper bound fixed at [1.00].

The p-value is 5^{-4}.

Now let’s perform CA. For this analysis, we will keep 3 axes.

Code
ca <- dudi.coa(interactions, 
                scannf = FALSE, 
                nf = params$nf_ca)

CA eigenvalues and square root of eigenvalues:

Code
# Eigenvalues
ca$eig
 [1] 6.191934e-01 4.167874e-01 3.928879e-01 3.039191e-01 2.773904e-01
 [6] 2.581142e-01 1.784613e-01 1.448607e-01 1.294030e-01 1.157875e-01
[11] 1.027157e-01 8.850992e-02 8.342531e-02 6.946602e-02 5.637214e-02
[16] 4.874483e-02 4.126778e-02 3.824801e-02 3.333744e-02 3.185438e-02
[21] 2.555155e-02 2.300389e-02 2.218657e-02 2.084826e-02 1.716005e-02
[26] 1.435705e-02 1.262480e-02 1.193993e-02 7.404297e-03 5.325839e-03
[31] 4.704812e-03 3.494085e-03 2.855760e-03 2.200572e-03 1.676954e-03
[36] 9.899936e-04 6.284900e-04 3.436072e-04 1.314759e-07
Code
# Square roots of eigenvalues
sqrt(ca$eig)
 [1] 0.7868884498 0.6455907415 0.6268077170 0.5512885639 0.5266786274
 [6] 0.5080493838 0.4224467983 0.3806057051 0.3597263539 0.3402755650
[11] 0.3204929191 0.2975061708 0.2888343968 0.2635640656 0.2374281810
[16] 0.2207823099 0.2031447240 0.1955709726 0.1825854443 0.1784779585
[21] 0.1598485306 0.1516703401 0.1489515770 0.1443892510 0.1309963771
[26] 0.1198209041 0.1123601228 0.1092700031 0.0860482228 0.0729783458
[31] 0.0685916339 0.0591107831 0.0534393144 0.0469102545 0.0409506254
[36] 0.0314641633 0.0250697037 0.0185366457 0.0003625961

Below are the corresponding eigenvalues graphs:

Code
plot_eig(ca$eig) + 
  ggtitle("Eigenvalues") +
  theme(axis.title.y = element_blank())

Code
plot_eig(sqrt(ca$eig)) + 
  ggtitle("Square roots of eigenvalues") +
  theme(axis.title.y = element_blank())

Code
plot_eig(cumsum(ca$eig)/sum(ca$eig)) +
  ggtitle("Cumulative sum of relative eigenvalues") +
  theme(axis.title.y = element_blank())

Below is the interaction matrix reordered with the first axis of the CA:

Code
interactions_reordered <- interactions[order(ca$li[, 1]),
                                       order(ca$co[, 1])]

plot_matrix(interactions_reordered, 
            legend_title = ifelse(params$log, 
                                  "ln(frequency)", "Frequency"), 
            max_size = msize,
            base_size = 9,
            breaks = breaks,
            trans = "log1p",
            xlab = "Birds") +
  theme(legend.key.width = unit(0.1, 'cm'),
        axis.line.x.top = element_line(colour = params$colco,
                                       # lineend = "butt",
                                       linewidth = 1),
        axis.ticks.x.top = element_line(colour = params$colco),
        axis.line.y = element_line(colour = params$colli, 
                                   # lineend = "square",
                                   linewidth = 1),
        axis.ticks.y = element_line(colour = params$colli))

Code
# Export plot
ggsave(file.path(figures_path, "matrix_reordered.jpeg"),
       width = 14, height = 12, 
       dpi = 600, units = "cm", bg = "white")
Code
# Reorder CA coordinates as the graph nodes
co_reordered <- ca$co[nodes, 1]
co_reordered <- co_reordered[!is.na(co_reordered)]

li_reordered <- ca$li[nodes, 1]
li_reordered <- li_reordered[!is.na(li_reordered)]

# Get the ranks
co_rank <- rank(ca$co[,1])
li_rank <- rank(li_reordered)

# Transformto centered coordinates
co_y <- scale(co_rank, center = TRUE,
              scale = FALSE)
li_y <- scale(li_rank, center = TRUE,
              scale = FALSE)

y2 <- cbind(c(li_y, co_y))

lay2 <- lay |> 
  mutate(y = y2)
Code
ggraph(gtbl, layout = lay2) +
  geom_edge_fan(aes(width = weight),
                alpha = 0.6,
                show.legend = FALSE) +
  scale_edge_width(range = c(.2, 2.5)) +
  scale_color_manual(values = c(params$colco, params$colli)) +
  geom_node_point(aes(col = type, size = degree),
                  show.legend = FALSE) +
  scale_size_area(max_size = 3, trans = "log") +
  geom_node_text(aes(label = name), 
                 show.legend = FALSE,
                 size = 2.5,
                 nudge_x = c(rep(-0.03, nrow(interactions)),
                             rep(0.03, ncol(interactions)))) +
  theme_void()

We can also rearrange the interaction matrix to highlight nestedless. For that, we arrange by decreasing weighted degree:

Code
interactions_nested <- interactions[order(rowSums(interactions)),
                                    order(colSums(interactions))]

plot_matrix(interactions_nested, 
            legend_title = ifelse(params$log, 
                                  "ln(frequency)", "Frequency"), 
            max_size = msize,
            base_size = 9,
            breaks = breaks,
            trans = "log1p",
            xlab = "Birds") +
  theme(legend.key.width = unit(0.1, 'cm'),
        axis.line.x.top = element_line(colour = params$colco, 
                                       linewidth = 1),
        axis.ticks.x.top = element_line(colour = params$colco),
        axis.line.y = element_line(colour = params$colli, 
                                   linewidth = 1),
        axis.ticks.y = element_line(colour = params$colli))

We can visualize the coordinates ordered along the axes and scan for gaps (as advised by Dam et al. (2021)):

Code
li_coord <- ca$li |> 
  rownames_to_column("plant")
  
li_coord <- li_coord |>
  pivot_longer(cols = 2:ncol(li_coord),
               names_to = "axis")

gli <- ggplot(li_coord) +
  geom_point(aes(x = reorder_within(plant, by = value, within = axis, 
                                    decreasing = TRUE), 
                 y = value, group = axis),
             col = params$colli) +
  xlab("Plants")
Code
co_coord <- ca$co |> 
  rownames_to_column("bird")
  
co_coord <- co_coord |>
  pivot_longer(cols = 2:ncol(co_coord),
               names_to = "axis") |> 
  mutate(axis = str_replace(axis, "Comp", "Axis"))

gco <- ggplot(co_coord) +
  geom_point(aes(x = reorder_within(bird, by = value, within = axis, 
                                    decreasing = TRUE), 
                 y = value, group = axis),
             col = params$colco) +
  xlab("Birds")
Code
gli / gco  &
  facet_grid(cols = vars(axis), scales = "free") &
  ylab("Coordinate") &
  theme_linedraw() &
  theme(axis.text.x = element_blank(),
        axis.ticks.x = element_blank())

To plot the results in a biplot, there are two scalings available. They are depicted below.

Code
xlim <- c(-6, 10)
ylim <- c(-6, 10)
multiplot(indiv_row = ca$li, indiv_col = ca$c1,
          row_color = params$colli, col_color = params$colco, eig = ca$eig,
          max.overlaps = 20,
          xlim = xlim, ylim = ylim,
          title = "Plants at the mean of birds (plants distances conserved)")
Warning: ggrepel: 31 unlabeled data points (too many overlaps). Consider
increasing max.overlaps
Warning: ggrepel: 22 unlabeled data points (too many overlaps). Consider
increasing max.overlaps

Code
multiplot(indiv_row = ca$li, indiv_col = ca$c1,
          x = 2, y = 3,
          row_color = params$colli, col_color = params$colco, eig = ca$eig,
          max.overlaps = 20,
          xlim = xlim, ylim = ylim,
          title = "Plants at the mean of birds (plants distances conserved)")
Warning: ggrepel: 35 unlabeled data points (too many overlaps). Consider
increasing max.overlaps
Warning: ggrepel: 26 unlabeled data points (too many overlaps). Consider
increasing max.overlaps

Code
xlim <- c(-6, 10)
ylim <- c(-6, 10)

multiplot(indiv_row = ca$l1, indiv_col = ca$co,
          row_color = params$colli, col_color = params$colco, eig = ca$eig,
          xlim = xlim, ylim = ylim,
          title = "Birds at the mean of plants (birds distances conserved)")
Warning: ggrepel: 38 unlabeled data points (too many overlaps). Consider
increasing max.overlaps
Warning: ggrepel: 18 unlabeled data points (too many overlaps). Consider
increasing max.overlaps

Code
multiplot(indiv_row = ca$l1, indiv_col = ca$co,
          x = 2, y = 3,
          xlim = xlim, ylim = ylim,
          row_color = params$colli, col_color = params$colco, eig = ca$eig,
          title = "Birds at the mean of plants (birds distances conserved)")
Warning: ggrepel: 43 unlabeled data points (too many overlaps). Consider
increasing max.overlaps
Warning: ggrepel: 21 unlabeled data points (too many overlaps). Consider
increasing max.overlaps

Reciprocal scaling

We will now perform reciprocal scaling (Thioulouse and Chessel 1992).

Code
recscal <- reciprocal.coa(ca)
Code
multiplot(indiv_row = recscal,
          indiv_row_lab = paste(recscal$Row, recscal$Col, sep = "-"),
          eig = ca$eig,
          alphapoints = 0.5)
Warning: ggrepel: 330 unlabeled data points (too many overlaps). Consider
increasing max.overlaps

This graph shows the correspondences of species interactions in the multivariate space. The correspondences are related to the CA coordinates as follows (Thioulouse and Chessel 1992):

\[h_k(i, j) = \frac{u^\star_{ik} + v^\star_{jk}}{\sqrt{2 \lambda_k \mu_k}}\]

To put it differently, each correspondence is at the “mean” position between the li(\(L_k(i)\)) and the co (scaled with a scaling factor).

The standard deviations for plant species \(i\) is (Thioulouse and Chessel 1992):

\[s_{ik} = \sqrt{\frac{1}{2\lambda_k\mu_k} \left( \frac{1}{y_{i \cdot}} \sum_{j = 1}^c \left(y_{ij} {v^\star_{jk}}^2 \right) - \lambda_k {u^\star_{ik}}^2 \right)}\]

And the standard deviations for bird species \(j\) is (Thioulouse and Chessel 1992):

\[s_{jk} = \sqrt{\frac{1}{2\lambda_k\mu_k} \left( \frac{1}{y_{\cdot j}} \sum_{i = 1}^r \left(y_{ij} {u^\star_{ik}}^2 \right) - \lambda_k {v^\star_{jk}}^2 \right)}\] NB: these standard deviations can also be computed directly as the standard deviations of the \(h_k(i, j)\).

The graphs below show plants and birds interaction niches in the multivariate space.

Code
# Create viridis palette for rows
colrow <- viridis(nrow(interactions), option = "G", end = 0.9)

# Order palette so that colors match position on axis 1
colrow <- colrow[rank(ca$li[[1]])]

# png(file.path(figures_path, "plant_ell.png"),
#     bg = "white",
#     res = 600,
#     width = 15, height = 15, units = "cm")
plot_reciprocal(recscal = recscal,
                dudi = ca,
                col = colrow,
                group = "li",
                xax = 1, yax = 2)

Code
# dev.off()

plot_reciprocal(recscal = recscal,
                dudi = ca,
                col = colrow,
                group = "li",
                xax = 2, yax = 3)

Code
# Create viridis palette for columns
colco <- viridis(ncol(interactions), option = "F", end = 0.9)

# Order palette so that colors match position on axis 1
colco <- colco[rank(ca$co[[1]])]

# png(file.path(figures_path, "birds_ell.png"),
#     bg = "white",
#     res = 600,
#     width = 15, height = 15, units = "cm")
plot_reciprocal(recscal = recscal, 
                dudi = ca,
                group = "co", 
                xax = 1, yax = 2,
                col = colco)

Code
# dev.off()

plot_reciprocal(recscal = recscal, 
                dudi = ca,
                group = "co", 
                xax = 2, yax = 3,
                col = colco)

Linear model

Here we try to model niche width as a function of the position in the axes of the multivariate analysis.

A linear model is also constructed and chosen as the best model between \[s_k = a~m_k + c \] and \[s_k = a~m_k + b~m_k^2 + c\]

Code
ts <- 2.5 # labels size
ps <- 1 # points size

ax <- 1:3

lmplots <- list()

gbirds2 <- list()
gplants2 <- list()

lmbirds <- vector(mode = "list", length = length(ax))
names(lmbirds) <- paste0("ax", ax, "_birds")

lmplants <- vector(mode = "list", length = length(ax))
names(lmplants) <- paste0("ax", ax, "_plants")

msd <- get_mean_sd(recscal, ax = 1:params$nf_ca)
Code
for(a in ax){
  # Get the dataframes with mean/standard deviation for a given axis
  msd_birds <- data.frame(birds = rownames(msd$colsd),
                          mean = msd$colmean[, a],
                          sd = msd$colsd[, a])
  msd_plants <- data.frame(plants = rownames(msd$rowsd),
                           mean = msd$rowmean[, a],
                           sd = msd$rowsd[, a])
  
  # Linear models for birds ---
  # Construct models
  lmsimple <- lm(sd ~ mean, msd_birds)
  lm2 <- lm(sd ~ mean + I(mean^2), msd_birds)
  
  # Get best model for birds
  lmb <- get_best_model(lmsimple, lm2, alpha = params$alpha)
  
  lab_birds <- lm_labels(lmb, a)
  
  # Get predictions
  pred <- "confidence"
  birds_pred <- get_pred(dat_predict = msd_birds$mean, lmb, 
                         interval = pred, level = 1 - params$alpha)
  
  # Linear models for plants ---
  # Construct models
  lmsimple <- lm(sd ~ mean, msd_plants)
  lm2 <- lm(sd ~ mean + I(mean^2), msd_plants)
  
  # Get best model for plants
  lmp <- get_best_model(lmsimple, lm2, alpha = params$alpha)

  lab_plants <- lm_labels(lmp, a)
  
  # Get predictions
  plants_pred <- get_pred(dat_predict = msd_plants$mean, lmp,
                          interval = pred, level = 1 - params$alpha)
  
  # Merge datasets ---
  msd_plants <- msd_plants |> 
    rename("species" = "plants") |> 
    mutate(type = "Plants")
  msd_birds <- msd_birds |> 
    rename("species" = "birds") |> 
    mutate(type = "Birds")
  msd_merge <- rbind(msd_plants, msd_birds)
  
  plants_pred <- plants_pred |> 
    mutate(type = "Plants")
  birds_pred <- birds_pred |> 
    mutate(type = "Birds")
  pred <- rbind(plants_pred, birds_pred)
  
  lab_df <- rbind(lab_birds, lab_plants)
  lab_df$type <- c("Birds", "Plants")
  
  # Plots sd versus mean ---
  lmplots[[a]] <- plot_lm_mean_sd(msd_merge, 
                                  dat_pred = pred,
                                  text_size = ts,
                                  points_size = ps,
                                  nudge_x = 0.1,
                                  col = c(params$colco, params$colli), 
                                  lab = lab_df,
                                  ylab = paste0("Niche breadth (standard deviation on axis ", a, ")"), 
                                  xlab = paste0("Niche optimum (mean position on axis ", a, ")"), 
                                  facet = "type")
  
  lmbirds[[a]] <- lmb
  lmplants[[a]] <- lmp
  
  # Plot sd versus number of interactions ---
  gb2 <- ggplot() +
    geom_point(aes(y = msd_birds$sd, x = colSums(interactions != 0)),
               col = params$colco, alpha = 0.5) +
    theme_linedraw() +
    ggtitle(paste("Bird niche width versus number of interacting partners on axis", a)) +
    xlim(c(0, max(colSums(interactions != 0))))
  
  gp2 <- ggplot() +
    geom_point(aes(y = msd_plants$sd, x = rowSums(interactions != 0)),
               col = params$colli, alpha = 0.5) +
    theme_linedraw() +
    ggtitle(paste("Plant niche width versus number of interacting partners on axis", a)) +
    xlim(c(0, max(rowSums(interactions != 0))))
  
  gbirds2 <- c(gbirds2, list(gb2))
  gplants2 <- c(gplants2, list(gp2))
  
}
Code
lmplots[[1]] + 
  ylim(0, 1.8) +
  theme(plot.margin = margin(5, 10, 0, 5),
        text = element_text(size = ts*3.5))

Code
# Export plot
ggsave(file.path(figures_path, "lm_ax1.jpeg"),
       width = 18, height = 8, 
       dpi = 600, units = "cm", bg = "white")

lmplots[[2]] + 
  theme(plot.margin = margin(5, 10, 0, 5),
        text = element_text(size = ts*3.5))

Code
# Export plot
ggsave(file.path(figures_path, "lm_ax2.jpeg"),
       width = 18, height = 8, 
       dpi = 600, units = "cm", bg = "white")

lmplots[[3]] + 
  theme(plot.margin = margin(5, 10, 0, 5),
        text = element_text(size = ts*3.5))

Relationship with traits

In order to interpret the axes, we can compute the correlations between the traits tables and the coordinates of birds/plants on each axis.

Code
corli <- cor(ca$li, plant_traits)
corli <- t(corli)
Code
corco <- cor(ca$co, animal_traits)
corco <- t(corco)

Below is the a posteriori correlation circle between the axes and the traits:

Code
rownames(corli) <- c("fruit diameter", "plant height", "crop mass")
rownames(corco) <- c("Kipp's index", "bill width", "body mass")

plot_corcircle(cor = corli,
               col = params$colli,
               cor2 = corco,
               lty = "longdash",
               lty2 = "solid",
               col2 = params$colco,
               eig = ca$eig)

Code
ggsave(file.path(figures_path, "corcircle.svg"),
       width = 15, height = 13, units = "cm")

plot_corcircle(cor = corli,
               col = params$colli,
               xax = 2, yax = 3,
               cor2 = corco,
               lty = "solid",
               lty2 = "longdash",
               col2 = params$colco,
               eig = ca$eig)

Code
corli |> 
  DT::datatable() |> 
  DT::formatRound(1:ncol(corli), digits = 2)
Code
corco |> 
  DT::datatable() |> 
  DT::formatRound(1:ncol(corco), digits = 2)

Linear model with residuals

In this section, we construct linear models to predict the residuals of the regression of multivariate coordinates on known species traits. The aim is to look for a missing trait by examining the relationship between residuals and multivariate coordinates.

Plants (rows)

We first construct the linear model predicting plants CA coordinates from their traits.

Code
lm_li <- apply(ca$li, MARGIN = 2, 
               function(li) lm(li ~ plant_traits$meanHeight + plant_traits$CropMass + plant_traits$meanD1))

names(lm_li) <- paste0("Ax", 1:length(lm_li))

lapply(lm_li, summary)
$Ax1

Call:
lm(formula = li ~ plant_traits$meanHeight + plant_traits$CropMass + 
    plant_traits$meanD1)

Residuals:
    Min      1Q  Median      3Q     Max 
-4.2745 -1.2198 -0.3298  0.3438  5.3451 

Coefficients:
                          Estimate Std. Error t value Pr(>|t|)  
(Intercept)             -1.6364528  1.1618793  -1.408   0.1676  
plant_traits$meanHeight  0.1267155  0.1265614   1.001   0.3234  
plant_traits$CropMass   -0.0002272  0.0003218  -0.706   0.4847  
plant_traits$meanD1      0.2932961  0.1257258   2.333   0.0254 *
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 2.093 on 36 degrees of freedom
Multiple R-squared:  0.2388,    Adjusted R-squared:  0.1754 
F-statistic: 3.765 on 3 and 36 DF,  p-value: 0.01895


$Ax2

Call:
lm(formula = li ~ plant_traits$meanHeight + plant_traits$CropMass + 
    plant_traits$meanD1)

Residuals:
    Min      1Q  Median      3Q     Max 
-1.6708 -0.7660 -0.2094  0.3716  4.1495 

Coefficients:
                          Estimate Std. Error t value Pr(>|t|)  
(Intercept)              1.3079644  0.6397991   2.044   0.0483 *
plant_traits$meanHeight -0.1506873  0.0696922  -2.162   0.0373 *
plant_traits$CropMass   -0.0001924  0.0001772  -1.086   0.2848  
plant_traits$meanD1      0.0598534  0.0692320   0.865   0.3930  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 1.152 on 36 degrees of freedom
Multiple R-squared:  0.2684,    Adjusted R-squared:  0.2075 
F-statistic: 4.403 on 3 and 36 DF,  p-value: 0.00974


$Ax3

Call:
lm(formula = li ~ plant_traits$meanHeight + plant_traits$CropMass + 
    plant_traits$meanD1)

Residuals:
    Min      1Q  Median      3Q     Max 
-2.3713 -0.8794 -0.2794  0.6769  4.8751 

Coefficients:
                          Estimate Std. Error t value Pr(>|t|)  
(Intercept)              1.8489504  0.7363341   2.511   0.0167 *
plant_traits$meanHeight -0.1424334  0.0802076  -1.776   0.0842 .
plant_traits$CropMass    0.0003261  0.0002039   1.599   0.1186  
plant_traits$meanD1     -0.1084142  0.0796780  -1.361   0.1821  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 1.326 on 36 degrees of freedom
Multiple R-squared:  0.1145,    Adjusted R-squared:  0.04072 
F-statistic: 1.552 on 3 and 36 DF,  p-value: 0.2179

Then, we pot the relationship between CA coordinates and the residuals of these models.

Code
res_list <- lapply(seq_along(lm_li), 
                 function(i) data.frame(residuals = residuals(lm_li[[i]]), coord = ca$li[, i]))
  
for (i in 1:length(res_list)) {
  dfi <- res_list[[i]]
  
  gp <- ggplot(dfi, aes(x = coord, y = residuals)) +
    geom_point(col = params$colli) +
    geom_smooth(method = "lm", col = params$colli) +
    theme_linedraw() +
    ggtitle(paste("Axis", i))
  print(gp)
}
`geom_smooth()` using formula = 'y ~ x'

`geom_smooth()` using formula = 'y ~ x'

`geom_smooth()` using formula = 'y ~ x'

Birds (columns)

We do the same for birds.

Code
lm_co <- apply(ca$co, MARGIN = 2, 
               function(co) lm(co ~ animal_traits$Bodymass + animal_traits$BillWidth + animal_traits$KippsIndex))

names(lm_co) <- paste0("Ax", 1:length(lm_co))

lapply(lm_co, summary)
$Ax1

Call:
lm(formula = co ~ animal_traits$Bodymass + animal_traits$BillWidth + 
    animal_traits$KippsIndex)

Residuals:
    Min      1Q  Median      3Q     Max 
-2.8305 -0.4059 -0.0376  0.3589  4.0612 

Coefficients:
                          Estimate Std. Error t value Pr(>|t|)    
(Intercept)              -4.187964   0.558502  -7.499 1.12e-09 ***
animal_traits$Bodymass   -0.002276   0.001405  -1.620  0.11169    
animal_traits$BillWidth   0.323581   0.039214   8.252 7.90e-11 ***
animal_traits$KippsIndex  7.605599   2.272965   3.346  0.00158 ** 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 1.036 on 49 degrees of freedom
Multiple R-squared:  0.7033,    Adjusted R-squared:  0.6851 
F-statistic: 38.71 on 3 and 49 DF,  p-value: 5.669e-13


$Ax2

Call:
lm(formula = co ~ animal_traits$Bodymass + animal_traits$BillWidth + 
    animal_traits$KippsIndex)

Residuals:
    Min      1Q  Median      3Q     Max 
-2.0904 -0.4652 -0.0623  0.2344  3.5515 

Coefficients:
                           Estimate Std. Error t value Pr(>|t|)    
(Intercept)               1.6291476  0.5139784   3.170 0.002630 ** 
animal_traits$Bodymass    0.0001647  0.0012931   0.127 0.899161    
animal_traits$BillWidth   0.0372378  0.0360882   1.032 0.307206    
animal_traits$KippsIndex -8.5095565  2.0917654  -4.068 0.000172 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.9534 on 49 degrees of freedom
Multiple R-squared:  0.2691,    Adjusted R-squared:  0.2244 
F-statistic: 6.014 on 3 and 49 DF,  p-value: 0.001427


$Ax3

Call:
lm(formula = co ~ animal_traits$Bodymass + animal_traits$BillWidth + 
    animal_traits$KippsIndex)

Residuals:
    Min      1Q  Median      3Q     Max 
-1.7002 -0.4934 -0.3312  0.2064  3.9726 

Coefficients:
                          Estimate Std. Error t value Pr(>|t|)  
(Intercept)               1.476814   0.554260   2.664   0.0104 *
animal_traits$Bodymass    0.002303   0.001394   1.652   0.1050  
animal_traits$BillWidth  -0.056295   0.038916  -1.447   0.1544  
animal_traits$KippsIndex -3.628446   2.255700  -1.609   0.1141  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 1.028 on 49 degrees of freedom
Multiple R-squared:  0.1273,    Adjusted R-squared:  0.07385 
F-statistic: 2.382 on 3 and 49 DF,  p-value: 0.08075
Code
res_list <- lapply(seq_along(lm_co), 
                 function(i) data.frame(residuals = residuals(lm_co[[i]]), coord = ca$co[, i]))
  
for (i in 1:length(res_list)) {
  dfi <- res_list[[i]]
  
  gp <- ggplot(dfi, aes(x = coord, y = residuals)) +
    geom_point(col = params$colco) +
    geom_smooth(method = "lm", col = params$colco) +
    theme_linedraw() +
    ggtitle(paste("Axis", i))
  print(gp)
}
`geom_smooth()` using formula = 'y ~ x'

`geom_smooth()` using formula = 'y ~ x'

`geom_smooth()` using formula = 'y ~ x'

References

Dam, Alje van, Mark Dekker, Ignacio Morales-Castilla, Miguel á. Rodríguez, David Wichmann, and Mara Baudena. 2021. “Correspondence Analysis, Spectral Clustering and Graph Embedding: Applications to Ecology and Economic Complexity.” Scientific Reports 11 (1): 8926. https://doi.org/10.1038/s41598-021-87971-9.
Dehling, D. Matthias, Irene M. A. Bender, Pedro G. Blendinger, Marcia C. Muñoz, Marta Quitián, Francisco Saavedra, Vinicio Santillán, Katrin Böhning-Gaese, Eike-Lena Neuschulz, and Matthias Schleuning. 2021. “ANDEAN Frugivory: Data on Plantbird Interactions and Functional Traits of Plant and Bird Species from Montane Forests Along the Andes.” https://doi.org/10.5061/DRYAD.WM37PVMN5.
Thioulouse, Jean, and Daniel Chessel. 1992. “A Method for Reciprocal Scaling of Species Tolerance and Sample Diversity.” Ecology 73 (2): 670–80. https://doi.org/10.2307/1940773.